1 Reading in of all infos for one file

We will focus on control siRNA_1 as a starter

list.files("kallisto_results/", recursive = TRUE)
##   [1] "v100/Control_siRNA_1/abundance.tsv" "v100/Control_siRNA_1/gene_expr.csv"
##   [3] "v100/Control_siRNA_2/abundance.tsv" "v100/Control_siRNA_2/gene_expr.csv"
##   [5] "v100/Control_siRNA_3/abundance.tsv" "v100/Control_siRNA_3/gene_expr.csv"
##   [7] "v100/STAT5A_siRNA_1/abundance.tsv"  "v100/STAT5A_siRNA_1/gene_expr.csv" 
##   [9] "v100/STAT5A_siRNA_2/abundance.tsv"  "v100/STAT5A_siRNA_2/gene_expr.csv" 
##  [11] "v100/STAT5A_siRNA_3/abundance.tsv"  "v100/STAT5A_siRNA_3/gene_expr.csv" 
##  [13] "v100/STAT5B_siRNA_1/abundance.tsv"  "v100/STAT5B_siRNA_1/gene_expr.csv" 
##  [15] "v100/STAT5B_siRNA_2/abundance.tsv"  "v100/STAT5B_siRNA_2/gene_expr.csv" 
##  [17] "v100/STAT5B_siRNA_3/abundance.tsv"  "v100/STAT5B_siRNA_3/gene_expr.csv" 
##  [19] "v101/Control_siRNA_1/abundance.tsv" "v101/Control_siRNA_1/gene_expr.csv"
##  [21] "v101/Control_siRNA_2/abundance.tsv" "v101/Control_siRNA_2/gene_expr.csv"
##  [23] "v101/Control_siRNA_3/abundance.tsv" "v101/Control_siRNA_3/gene_expr.csv"
##  [25] "v101/STAT5A_siRNA_1/abundance.tsv"  "v101/STAT5A_siRNA_1/gene_expr.csv" 
##  [27] "v101/STAT5A_siRNA_2/abundance.tsv"  "v101/STAT5A_siRNA_2/gene_expr.csv" 
##  [29] "v101/STAT5A_siRNA_3/abundance.tsv"  "v101/STAT5A_siRNA_3/gene_expr.csv" 
##  [31] "v101/STAT5B_siRNA_1/abundance.tsv"  "v101/STAT5B_siRNA_1/gene_expr.csv" 
##  [33] "v101/STAT5B_siRNA_2/abundance.tsv"  "v101/STAT5B_siRNA_2/gene_expr.csv" 
##  [35] "v101/STAT5B_siRNA_3/abundance.tsv"  "v101/STAT5B_siRNA_3/gene_expr.csv" 
##  [37] "v102/Control_siRNA_1/abundance.tsv" "v102/Control_siRNA_1/gene_expr.csv"
##  [39] "v102/Control_siRNA_2/abundance.tsv" "v102/Control_siRNA_2/gene_expr.csv"
##  [41] "v102/Control_siRNA_3/abundance.tsv" "v102/Control_siRNA_3/gene_expr.csv"
##  [43] "v102/STAT5A_siRNA_1/abundance.tsv"  "v102/STAT5A_siRNA_1/gene_expr.csv" 
##  [45] "v102/STAT5A_siRNA_2/abundance.tsv"  "v102/STAT5A_siRNA_2/gene_expr.csv" 
##  [47] "v102/STAT5A_siRNA_3/abundance.tsv"  "v102/STAT5A_siRNA_3/gene_expr.csv" 
##  [49] "v102/STAT5B_siRNA_1/abundance.tsv"  "v102/STAT5B_siRNA_1/gene_expr.csv" 
##  [51] "v102/STAT5B_siRNA_2/abundance.tsv"  "v102/STAT5B_siRNA_2/gene_expr.csv" 
##  [53] "v102/STAT5B_siRNA_3/abundance.tsv"  "v102/STAT5B_siRNA_3/gene_expr.csv" 
##  [55] "v103/Control_siRNA_1/abundance.tsv" "v103/Control_siRNA_1/gene_expr.csv"
##  [57] "v103/Control_siRNA_2/abundance.tsv" "v103/Control_siRNA_2/gene_expr.csv"
##  [59] "v103/Control_siRNA_3/abundance.tsv" "v103/Control_siRNA_3/gene_expr.csv"
##  [61] "v103/STAT5A_siRNA_1/abundance.tsv"  "v103/STAT5A_siRNA_1/gene_expr.csv" 
##  [63] "v103/STAT5A_siRNA_2/abundance.tsv"  "v103/STAT5A_siRNA_2/gene_expr.csv" 
##  [65] "v103/STAT5A_siRNA_3/abundance.tsv"  "v103/STAT5A_siRNA_3/gene_expr.csv" 
##  [67] "v103/STAT5B_siRNA_1/abundance.tsv"  "v103/STAT5B_siRNA_1/gene_expr.csv" 
##  [69] "v103/STAT5B_siRNA_2/abundance.tsv"  "v103/STAT5B_siRNA_2/gene_expr.csv" 
##  [71] "v103/STAT5B_siRNA_3/abundance.tsv"  "v103/STAT5B_siRNA_3/gene_expr.csv" 
##  [73] "v104/Control_siRNA_1/abundance.tsv" "v104/Control_siRNA_1/gene_expr.csv"
##  [75] "v104/Control_siRNA_2/abundance.tsv" "v104/Control_siRNA_2/gene_expr.csv"
##  [77] "v104/Control_siRNA_3/abundance.tsv" "v104/Control_siRNA_3/gene_expr.csv"
##  [79] "v104/STAT5A_siRNA_1/abundance.tsv"  "v104/STAT5A_siRNA_1/gene_expr.csv" 
##  [81] "v104/STAT5A_siRNA_2/abundance.tsv"  "v104/STAT5A_siRNA_2/gene_expr.csv" 
##  [83] "v104/STAT5A_siRNA_3/abundance.tsv"  "v104/STAT5A_siRNA_3/gene_expr.csv" 
##  [85] "v104/STAT5B_siRNA_1/abundance.tsv"  "v104/STAT5B_siRNA_1/gene_expr.csv" 
##  [87] "v104/STAT5B_siRNA_2/abundance.tsv"  "v104/STAT5B_siRNA_2/gene_expr.csv" 
##  [89] "v104/STAT5B_siRNA_3/abundance.tsv"  "v104/STAT5B_siRNA_3/gene_expr.csv" 
##  [91] "v105/Control_siRNA_1/abundance.tsv" "v105/Control_siRNA_1/gene_expr.csv"
##  [93] "v105/Control_siRNA_2/abundance.tsv" "v105/Control_siRNA_2/gene_expr.csv"
##  [95] "v105/Control_siRNA_3/abundance.tsv" "v105/Control_siRNA_3/gene_expr.csv"
##  [97] "v105/STAT5A_siRNA_1/abundance.tsv"  "v105/STAT5A_siRNA_1/gene_expr.csv" 
##  [99] "v105/STAT5A_siRNA_2/abundance.tsv"  "v105/STAT5A_siRNA_2/gene_expr.csv" 
## [101] "v105/STAT5A_siRNA_3/abundance.tsv"  "v105/STAT5A_siRNA_3/gene_expr.csv" 
## [103] "v105/STAT5B_siRNA_1/abundance.tsv"  "v105/STAT5B_siRNA_1/gene_expr.csv" 
## [105] "v105/STAT5B_siRNA_2/abundance.tsv"  "v105/STAT5B_siRNA_2/gene_expr.csv" 
## [107] "v105/STAT5B_siRNA_3/abundance.tsv"  "v105/STAT5B_siRNA_3/gene_expr.csv" 
## [109] "v106/Control_siRNA_1/abundance.tsv" "v106/Control_siRNA_1/gene_expr.csv"
## [111] "v106/Control_siRNA_2/abundance.tsv" "v106/Control_siRNA_2/gene_expr.csv"
## [113] "v106/Control_siRNA_3/abundance.tsv" "v106/Control_siRNA_3/gene_expr.csv"
## [115] "v106/STAT5A_siRNA_1/abundance.tsv"  "v106/STAT5A_siRNA_1/gene_expr.csv" 
## [117] "v106/STAT5A_siRNA_2/abundance.tsv"  "v106/STAT5A_siRNA_2/gene_expr.csv" 
## [119] "v106/STAT5A_siRNA_3/abundance.tsv"  "v106/STAT5A_siRNA_3/gene_expr.csv" 
## [121] "v106/STAT5B_siRNA_1/abundance.tsv"  "v106/STAT5B_siRNA_1/gene_expr.csv" 
## [123] "v106/STAT5B_siRNA_2/abundance.tsv"  "v106/STAT5B_siRNA_2/gene_expr.csv" 
## [125] "v106/STAT5B_siRNA_3/abundance.tsv"  "v106/STAT5B_siRNA_3/gene_expr.csv" 
## [127] "v107/Control_siRNA_1/abundance.tsv" "v107/Control_siRNA_1/gene_expr.csv"
## [129] "v107/Control_siRNA_2/abundance.tsv" "v107/Control_siRNA_2/gene_expr.csv"
## [131] "v107/Control_siRNA_3/abundance.tsv" "v107/Control_siRNA_3/gene_expr.csv"
## [133] "v107/STAT5A_siRNA_1/abundance.tsv"  "v107/STAT5A_siRNA_1/gene_expr.csv" 
## [135] "v107/STAT5A_siRNA_2/abundance.tsv"  "v107/STAT5A_siRNA_2/gene_expr.csv" 
## [137] "v107/STAT5A_siRNA_3/abundance.tsv"  "v107/STAT5A_siRNA_3/gene_expr.csv" 
## [139] "v107/STAT5B_siRNA_1/abundance.tsv"  "v107/STAT5B_siRNA_1/gene_expr.csv" 
## [141] "v107/STAT5B_siRNA_2/abundance.tsv"  "v107/STAT5B_siRNA_2/gene_expr.csv" 
## [143] "v107/STAT5B_siRNA_3/abundance.tsv"  "v107/STAT5B_siRNA_3/gene_expr.csv" 
## [145] "v108/Control_siRNA_1/abundance.tsv" "v108/Control_siRNA_1/gene_expr.csv"
## [147] "v108/Control_siRNA_2/abundance.tsv" "v108/Control_siRNA_2/gene_expr.csv"
## [149] "v108/Control_siRNA_3/abundance.tsv" "v108/Control_siRNA_3/gene_expr.csv"
## [151] "v108/STAT5A_siRNA_1/abundance.tsv"  "v108/STAT5A_siRNA_1/gene_expr.csv" 
## [153] "v108/STAT5A_siRNA_2/abundance.tsv"  "v108/STAT5A_siRNA_2/gene_expr.csv" 
## [155] "v108/STAT5A_siRNA_3/abundance.tsv"  "v108/STAT5A_siRNA_3/gene_expr.csv" 
## [157] "v108/STAT5B_siRNA_1/abundance.tsv"  "v108/STAT5B_siRNA_1/gene_expr.csv" 
## [159] "v108/STAT5B_siRNA_2/abundance.tsv"  "v108/STAT5B_siRNA_2/gene_expr.csv" 
## [161] "v108/STAT5B_siRNA_3/abundance.tsv"  "v108/STAT5B_siRNA_3/gene_expr.csv" 
## [163] "v109/Control_siRNA_1/abundance.tsv" "v109/Control_siRNA_1/gene_expr.csv"
## [165] "v109/Control_siRNA_2/abundance.tsv" "v109/Control_siRNA_2/gene_expr.csv"
## [167] "v109/Control_siRNA_3/abundance.tsv" "v109/Control_siRNA_3/gene_expr.csv"
## [169] "v109/STAT5A_siRNA_1/abundance.tsv"  "v109/STAT5A_siRNA_1/gene_expr.csv" 
## [171] "v109/STAT5A_siRNA_2/abundance.tsv"  "v109/STAT5A_siRNA_2/gene_expr.csv" 
## [173] "v109/STAT5A_siRNA_3/abundance.tsv"  "v109/STAT5A_siRNA_3/gene_expr.csv" 
## [175] "v109/STAT5B_siRNA_1/abundance.tsv"  "v109/STAT5B_siRNA_1/gene_expr.csv" 
## [177] "v109/STAT5B_siRNA_2/abundance.tsv"  "v109/STAT5B_siRNA_2/gene_expr.csv" 
## [179] "v109/STAT5B_siRNA_3/abundance.tsv"  "v109/STAT5B_siRNA_3/gene_expr.csv" 
## [181] "v86/Control_siRNA_1/abundance.tsv"  "v86/Control_siRNA_1/gene_expr.csv" 
## [183] "v86/Control_siRNA_2/abundance.tsv"  "v86/Control_siRNA_2/gene_expr.csv" 
## [185] "v86/Control_siRNA_3/abundance.tsv"  "v86/Control_siRNA_3/gene_expr.csv" 
## [187] "v86/STAT5A_siRNA_1/abundance.tsv"   "v86/STAT5A_siRNA_1/gene_expr.csv"  
## [189] "v86/STAT5A_siRNA_2/abundance.tsv"   "v86/STAT5A_siRNA_2/gene_expr.csv"  
## [191] "v86/STAT5A_siRNA_3/abundance.tsv"   "v86/STAT5A_siRNA_3/gene_expr.csv"  
## [193] "v86/STAT5B_siRNA_1/abundance.tsv"   "v86/STAT5B_siRNA_1/gene_expr.csv"  
## [195] "v86/STAT5B_siRNA_2/abundance.tsv"   "v86/STAT5B_siRNA_2/gene_expr.csv"  
## [197] "v86/STAT5B_siRNA_3/abundance.tsv"   "v86/STAT5B_siRNA_3/gene_expr.csv"  
## [199] "v87/Control_siRNA_1/abundance.tsv"  "v87/Control_siRNA_1/gene_expr.csv" 
## [201] "v87/Control_siRNA_2/abundance.tsv"  "v87/Control_siRNA_2/gene_expr.csv" 
## [203] "v87/Control_siRNA_3/abundance.tsv"  "v87/Control_siRNA_3/gene_expr.csv" 
## [205] "v87/STAT5A_siRNA_1/abundance.tsv"   "v87/STAT5A_siRNA_1/gene_expr.csv"  
## [207] "v87/STAT5A_siRNA_2/abundance.tsv"   "v87/STAT5A_siRNA_2/gene_expr.csv"  
## [209] "v87/STAT5A_siRNA_3/abundance.tsv"   "v87/STAT5A_siRNA_3/gene_expr.csv"  
## [211] "v87/STAT5B_siRNA_1/abundance.tsv"   "v87/STAT5B_siRNA_1/gene_expr.csv"  
## [213] "v87/STAT5B_siRNA_2/abundance.tsv"   "v87/STAT5B_siRNA_2/gene_expr.csv"  
## [215] "v87/STAT5B_siRNA_3/abundance.tsv"   "v87/STAT5B_siRNA_3/gene_expr.csv"  
## [217] "v88/Control_siRNA_1/abundance.tsv"  "v88/Control_siRNA_1/gene_expr.csv" 
## [219] "v88/Control_siRNA_2/abundance.tsv"  "v88/Control_siRNA_2/gene_expr.csv" 
## [221] "v88/Control_siRNA_3/abundance.tsv"  "v88/Control_siRNA_3/gene_expr.csv" 
## [223] "v88/STAT5A_siRNA_1/abundance.tsv"   "v88/STAT5A_siRNA_1/gene_expr.csv"  
## [225] "v88/STAT5A_siRNA_2/abundance.tsv"   "v88/STAT5A_siRNA_2/gene_expr.csv"  
## [227] "v88/STAT5A_siRNA_3/abundance.tsv"   "v88/STAT5A_siRNA_3/gene_expr.csv"  
## [229] "v88/STAT5B_siRNA_1/abundance.tsv"   "v88/STAT5B_siRNA_1/gene_expr.csv"  
## [231] "v88/STAT5B_siRNA_2/abundance.tsv"   "v88/STAT5B_siRNA_2/gene_expr.csv"  
## [233] "v88/STAT5B_siRNA_3/abundance.tsv"   "v88/STAT5B_siRNA_3/gene_expr.csv"  
## [235] "v89/Control_siRNA_1/abundance.tsv"  "v89/Control_siRNA_1/gene_expr.csv" 
## [237] "v89/Control_siRNA_2/abundance.tsv"  "v89/Control_siRNA_2/gene_expr.csv" 
## [239] "v89/Control_siRNA_3/abundance.tsv"  "v89/Control_siRNA_3/gene_expr.csv" 
## [241] "v89/STAT5A_siRNA_1/abundance.tsv"   "v89/STAT5A_siRNA_1/gene_expr.csv"  
## [243] "v89/STAT5A_siRNA_2/abundance.tsv"   "v89/STAT5A_siRNA_2/gene_expr.csv"  
## [245] "v89/STAT5A_siRNA_3/abundance.tsv"   "v89/STAT5A_siRNA_3/gene_expr.csv"  
## [247] "v89/STAT5B_siRNA_1/abundance.tsv"   "v89/STAT5B_siRNA_1/gene_expr.csv"  
## [249] "v89/STAT5B_siRNA_2/abundance.tsv"   "v89/STAT5B_siRNA_2/gene_expr.csv"  
## [251] "v89/STAT5B_siRNA_3/abundance.tsv"   "v89/STAT5B_siRNA_3/gene_expr.csv"  
## [253] "v90/Control_siRNA_1/abundance.tsv"  "v90/Control_siRNA_1/gene_expr.csv" 
## [255] "v90/Control_siRNA_2/abundance.tsv"  "v90/Control_siRNA_2/gene_expr.csv" 
## [257] "v90/Control_siRNA_3/abundance.tsv"  "v90/Control_siRNA_3/gene_expr.csv" 
## [259] "v90/STAT5A_siRNA_1/abundance.tsv"   "v90/STAT5A_siRNA_1/gene_expr.csv"  
## [261] "v90/STAT5A_siRNA_2/abundance.tsv"   "v90/STAT5A_siRNA_2/gene_expr.csv"  
## [263] "v90/STAT5A_siRNA_3/abundance.tsv"   "v90/STAT5A_siRNA_3/gene_expr.csv"  
## [265] "v90/STAT5B_siRNA_1/abundance.tsv"   "v90/STAT5B_siRNA_1/gene_expr.csv"  
## [267] "v90/STAT5B_siRNA_2/abundance.tsv"   "v90/STAT5B_siRNA_2/gene_expr.csv"  
## [269] "v90/STAT5B_siRNA_3/abundance.tsv"   "v90/STAT5B_siRNA_3/gene_expr.csv"  
## [271] "v91/Control_siRNA_1/abundance.tsv"  "v91/Control_siRNA_1/gene_expr.csv" 
## [273] "v91/Control_siRNA_2/abundance.tsv"  "v91/Control_siRNA_2/gene_expr.csv" 
## [275] "v91/Control_siRNA_3/abundance.tsv"  "v91/Control_siRNA_3/gene_expr.csv" 
## [277] "v91/STAT5A_siRNA_1/abundance.tsv"   "v91/STAT5A_siRNA_1/gene_expr.csv"  
## [279] "v91/STAT5A_siRNA_2/abundance.tsv"   "v91/STAT5A_siRNA_2/gene_expr.csv"  
## [281] "v91/STAT5A_siRNA_3/abundance.tsv"   "v91/STAT5A_siRNA_3/gene_expr.csv"  
## [283] "v91/STAT5B_siRNA_1/abundance.tsv"   "v91/STAT5B_siRNA_1/gene_expr.csv"  
## [285] "v91/STAT5B_siRNA_2/abundance.tsv"   "v91/STAT5B_siRNA_2/gene_expr.csv"  
## [287] "v91/STAT5B_siRNA_3/abundance.tsv"   "v91/STAT5B_siRNA_3/gene_expr.csv"  
## [289] "v92/Control_siRNA_1/abundance.tsv"  "v92/Control_siRNA_1/gene_expr.csv" 
## [291] "v92/Control_siRNA_2/abundance.tsv"  "v92/Control_siRNA_2/gene_expr.csv" 
## [293] "v92/Control_siRNA_3/abundance.tsv"  "v92/Control_siRNA_3/gene_expr.csv" 
## [295] "v92/STAT5A_siRNA_1/abundance.tsv"   "v92/STAT5A_siRNA_1/gene_expr.csv"  
## [297] "v92/STAT5A_siRNA_2/abundance.tsv"   "v92/STAT5A_siRNA_2/gene_expr.csv"  
## [299] "v92/STAT5A_siRNA_3/abundance.tsv"   "v92/STAT5A_siRNA_3/gene_expr.csv"  
## [301] "v92/STAT5B_siRNA_1/abundance.tsv"   "v92/STAT5B_siRNA_1/gene_expr.csv"  
## [303] "v92/STAT5B_siRNA_2/abundance.tsv"   "v92/STAT5B_siRNA_2/gene_expr.csv"  
## [305] "v92/STAT5B_siRNA_3/abundance.tsv"   "v92/STAT5B_siRNA_3/gene_expr.csv"  
## [307] "v93/Control_siRNA_1/abundance.tsv"  "v93/Control_siRNA_1/gene_expr.csv" 
## [309] "v93/Control_siRNA_2/abundance.tsv"  "v93/Control_siRNA_2/gene_expr.csv" 
## [311] "v93/Control_siRNA_3/abundance.tsv"  "v93/Control_siRNA_3/gene_expr.csv" 
## [313] "v93/STAT5A_siRNA_1/abundance.tsv"   "v93/STAT5A_siRNA_1/gene_expr.csv"  
## [315] "v93/STAT5A_siRNA_2/abundance.tsv"   "v93/STAT5A_siRNA_2/gene_expr.csv"  
## [317] "v93/STAT5A_siRNA_3/abundance.tsv"   "v93/STAT5A_siRNA_3/gene_expr.csv"  
## [319] "v93/STAT5B_siRNA_1/abundance.tsv"   "v93/STAT5B_siRNA_1/gene_expr.csv"  
## [321] "v93/STAT5B_siRNA_2/abundance.tsv"   "v93/STAT5B_siRNA_2/gene_expr.csv"  
## [323] "v93/STAT5B_siRNA_3/abundance.tsv"   "v93/STAT5B_siRNA_3/gene_expr.csv"  
## [325] "v94/Control_siRNA_1/abundance.tsv"  "v94/Control_siRNA_1/gene_expr.csv" 
## [327] "v94/Control_siRNA_2/abundance.tsv"  "v94/Control_siRNA_2/gene_expr.csv" 
## [329] "v94/Control_siRNA_3/abundance.tsv"  "v94/Control_siRNA_3/gene_expr.csv" 
## [331] "v94/STAT5A_siRNA_1/abundance.tsv"   "v94/STAT5A_siRNA_1/gene_expr.csv"  
## [333] "v94/STAT5A_siRNA_2/abundance.tsv"   "v94/STAT5A_siRNA_2/gene_expr.csv"  
## [335] "v94/STAT5A_siRNA_3/abundance.tsv"   "v94/STAT5A_siRNA_3/gene_expr.csv"  
## [337] "v94/STAT5B_siRNA_1/abundance.tsv"   "v94/STAT5B_siRNA_1/gene_expr.csv"  
## [339] "v94/STAT5B_siRNA_2/abundance.tsv"   "v94/STAT5B_siRNA_2/gene_expr.csv"  
## [341] "v94/STAT5B_siRNA_3/abundance.tsv"   "v94/STAT5B_siRNA_3/gene_expr.csv"  
## [343] "v95/Control_siRNA_1/abundance.tsv"  "v95/Control_siRNA_1/gene_expr.csv" 
## [345] "v95/Control_siRNA_2/abundance.tsv"  "v95/Control_siRNA_2/gene_expr.csv" 
## [347] "v95/Control_siRNA_3/abundance.tsv"  "v95/Control_siRNA_3/gene_expr.csv" 
## [349] "v95/STAT5A_siRNA_1/abundance.tsv"   "v95/STAT5A_siRNA_1/gene_expr.csv"  
## [351] "v95/STAT5A_siRNA_2/abundance.tsv"   "v95/STAT5A_siRNA_2/gene_expr.csv"  
## [353] "v95/STAT5A_siRNA_3/abundance.tsv"   "v95/STAT5A_siRNA_3/gene_expr.csv"  
## [355] "v95/STAT5B_siRNA_1/abundance.tsv"   "v95/STAT5B_siRNA_1/gene_expr.csv"  
## [357] "v95/STAT5B_siRNA_2/abundance.tsv"   "v95/STAT5B_siRNA_2/gene_expr.csv"  
## [359] "v95/STAT5B_siRNA_3/abundance.tsv"   "v95/STAT5B_siRNA_3/gene_expr.csv"  
## [361] "v96/Control_siRNA_1/abundance.tsv"  "v96/Control_siRNA_1/gene_expr.csv" 
## [363] "v96/Control_siRNA_2/abundance.tsv"  "v96/Control_siRNA_2/gene_expr.csv" 
## [365] "v96/Control_siRNA_3/abundance.tsv"  "v96/Control_siRNA_3/gene_expr.csv" 
## [367] "v96/STAT5A_siRNA_1/abundance.tsv"   "v96/STAT5A_siRNA_1/gene_expr.csv"  
## [369] "v96/STAT5A_siRNA_2/abundance.tsv"   "v96/STAT5A_siRNA_2/gene_expr.csv"  
## [371] "v96/STAT5A_siRNA_3/abundance.tsv"   "v96/STAT5A_siRNA_3/gene_expr.csv"  
## [373] "v96/STAT5B_siRNA_1/abundance.tsv"   "v96/STAT5B_siRNA_1/gene_expr.csv"  
## [375] "v96/STAT5B_siRNA_2/abundance.tsv"   "v96/STAT5B_siRNA_2/gene_expr.csv"  
## [377] "v96/STAT5B_siRNA_3/abundance.tsv"   "v96/STAT5B_siRNA_3/gene_expr.csv"  
## [379] "v97/Control_siRNA_1/abundance.tsv"  "v97/Control_siRNA_1/gene_expr.csv" 
## [381] "v97/Control_siRNA_2/abundance.tsv"  "v97/Control_siRNA_2/gene_expr.csv" 
## [383] "v97/Control_siRNA_3/abundance.tsv"  "v97/Control_siRNA_3/gene_expr.csv" 
## [385] "v97/STAT5A_siRNA_1/abundance.tsv"   "v97/STAT5A_siRNA_1/gene_expr.csv"  
## [387] "v97/STAT5A_siRNA_2/abundance.tsv"   "v97/STAT5A_siRNA_2/gene_expr.csv"  
## [389] "v97/STAT5A_siRNA_3/abundance.tsv"   "v97/STAT5A_siRNA_3/gene_expr.csv"  
## [391] "v97/STAT5B_siRNA_1/abundance.tsv"   "v97/STAT5B_siRNA_1/gene_expr.csv"  
## [393] "v97/STAT5B_siRNA_2/abundance.tsv"   "v97/STAT5B_siRNA_2/gene_expr.csv"  
## [395] "v97/STAT5B_siRNA_3/abundance.tsv"   "v97/STAT5B_siRNA_3/gene_expr.csv"  
## [397] "v98/Control_siRNA_1/abundance.tsv"  "v98/Control_siRNA_1/gene_expr.csv" 
## [399] "v98/Control_siRNA_2/abundance.tsv"  "v98/Control_siRNA_2/gene_expr.csv" 
## [401] "v98/Control_siRNA_3/abundance.tsv"  "v98/Control_siRNA_3/gene_expr.csv" 
## [403] "v98/STAT5A_siRNA_1/abundance.tsv"   "v98/STAT5A_siRNA_1/gene_expr.csv"  
## [405] "v98/STAT5A_siRNA_2/abundance.tsv"   "v98/STAT5A_siRNA_2/gene_expr.csv"  
## [407] "v98/STAT5A_siRNA_3/abundance.tsv"   "v98/STAT5A_siRNA_3/gene_expr.csv"  
## [409] "v98/STAT5B_siRNA_1/abundance.tsv"   "v98/STAT5B_siRNA_1/gene_expr.csv"  
## [411] "v98/STAT5B_siRNA_2/abundance.tsv"   "v98/STAT5B_siRNA_2/gene_expr.csv"  
## [413] "v98/STAT5B_siRNA_3/abundance.tsv"   "v98/STAT5B_siRNA_3/gene_expr.csv"  
## [415] "v99/Control_siRNA_1/abundance.tsv"  "v99/Control_siRNA_1/gene_expr.csv" 
## [417] "v99/Control_siRNA_2/abundance.tsv"  "v99/Control_siRNA_2/gene_expr.csv" 
## [419] "v99/Control_siRNA_3/abundance.tsv"  "v99/Control_siRNA_3/gene_expr.csv" 
## [421] "v99/STAT5A_siRNA_1/abundance.tsv"   "v99/STAT5A_siRNA_1/gene_expr.csv"  
## [423] "v99/STAT5A_siRNA_2/abundance.tsv"   "v99/STAT5A_siRNA_2/gene_expr.csv"  
## [425] "v99/STAT5A_siRNA_3/abundance.tsv"   "v99/STAT5A_siRNA_3/gene_expr.csv"  
## [427] "v99/STAT5B_siRNA_1/abundance.tsv"   "v99/STAT5B_siRNA_1/gene_expr.csv"  
## [429] "v99/STAT5B_siRNA_2/abundance.tsv"   "v99/STAT5B_siRNA_2/gene_expr.csv"  
## [431] "v99/STAT5B_siRNA_3/abundance.tsv"   "v99/STAT5B_siRNA_3/gene_expr.csv"
gene_counts_ctrl1_v86 <- read.csv("kallisto_results/v86/Control_siRNA_1/gene_expr.csv", sep = ";")
tx_counts_ctrl1_v86 <- read.csv("kallisto_results/v86/Control_siRNA_1/abundance.tsv", sep = "\t")

# View(gene_counts_ctrl1_v86)
# View(tx_counts_ctrl1_v86)


all_gene_files <- list.files("kallisto_results", pattern = "gene_expr.csv", 
                             recursive = TRUE, full.names = TRUE)
all_sample1_gene_files <- all_gene_files[grep("Control_siRNA_1", all_gene_files)]
all_sample1_gene_files
##  [1] "kallisto_results/v100/Control_siRNA_1/gene_expr.csv"
##  [2] "kallisto_results/v101/Control_siRNA_1/gene_expr.csv"
##  [3] "kallisto_results/v102/Control_siRNA_1/gene_expr.csv"
##  [4] "kallisto_results/v103/Control_siRNA_1/gene_expr.csv"
##  [5] "kallisto_results/v104/Control_siRNA_1/gene_expr.csv"
##  [6] "kallisto_results/v105/Control_siRNA_1/gene_expr.csv"
##  [7] "kallisto_results/v106/Control_siRNA_1/gene_expr.csv"
##  [8] "kallisto_results/v107/Control_siRNA_1/gene_expr.csv"
##  [9] "kallisto_results/v108/Control_siRNA_1/gene_expr.csv"
## [10] "kallisto_results/v109/Control_siRNA_1/gene_expr.csv"
## [11] "kallisto_results/v86/Control_siRNA_1/gene_expr.csv" 
## [12] "kallisto_results/v87/Control_siRNA_1/gene_expr.csv" 
## [13] "kallisto_results/v88/Control_siRNA_1/gene_expr.csv" 
## [14] "kallisto_results/v89/Control_siRNA_1/gene_expr.csv" 
## [15] "kallisto_results/v90/Control_siRNA_1/gene_expr.csv" 
## [16] "kallisto_results/v91/Control_siRNA_1/gene_expr.csv" 
## [17] "kallisto_results/v92/Control_siRNA_1/gene_expr.csv" 
## [18] "kallisto_results/v93/Control_siRNA_1/gene_expr.csv" 
## [19] "kallisto_results/v94/Control_siRNA_1/gene_expr.csv" 
## [20] "kallisto_results/v95/Control_siRNA_1/gene_expr.csv" 
## [21] "kallisto_results/v96/Control_siRNA_1/gene_expr.csv" 
## [22] "kallisto_results/v97/Control_siRNA_1/gene_expr.csv" 
## [23] "kallisto_results/v98/Control_siRNA_1/gene_expr.csv" 
## [24] "kallisto_results/v99/Control_siRNA_1/gene_expr.csv"
all_sample1_gene_files <- all_sample1_gene_files[c(c(11:24), c(1:10))]
all_sample1_gene_files
##  [1] "kallisto_results/v86/Control_siRNA_1/gene_expr.csv" 
##  [2] "kallisto_results/v87/Control_siRNA_1/gene_expr.csv" 
##  [3] "kallisto_results/v88/Control_siRNA_1/gene_expr.csv" 
##  [4] "kallisto_results/v89/Control_siRNA_1/gene_expr.csv" 
##  [5] "kallisto_results/v90/Control_siRNA_1/gene_expr.csv" 
##  [6] "kallisto_results/v91/Control_siRNA_1/gene_expr.csv" 
##  [7] "kallisto_results/v92/Control_siRNA_1/gene_expr.csv" 
##  [8] "kallisto_results/v93/Control_siRNA_1/gene_expr.csv" 
##  [9] "kallisto_results/v94/Control_siRNA_1/gene_expr.csv" 
## [10] "kallisto_results/v95/Control_siRNA_1/gene_expr.csv" 
## [11] "kallisto_results/v96/Control_siRNA_1/gene_expr.csv" 
## [12] "kallisto_results/v97/Control_siRNA_1/gene_expr.csv" 
## [13] "kallisto_results/v98/Control_siRNA_1/gene_expr.csv" 
## [14] "kallisto_results/v99/Control_siRNA_1/gene_expr.csv" 
## [15] "kallisto_results/v100/Control_siRNA_1/gene_expr.csv"
## [16] "kallisto_results/v101/Control_siRNA_1/gene_expr.csv"
## [17] "kallisto_results/v102/Control_siRNA_1/gene_expr.csv"
## [18] "kallisto_results/v103/Control_siRNA_1/gene_expr.csv"
## [19] "kallisto_results/v104/Control_siRNA_1/gene_expr.csv"
## [20] "kallisto_results/v105/Control_siRNA_1/gene_expr.csv"
## [21] "kallisto_results/v106/Control_siRNA_1/gene_expr.csv"
## [22] "kallisto_results/v107/Control_siRNA_1/gene_expr.csv"
## [23] "kallisto_results/v108/Control_siRNA_1/gene_expr.csv"
## [24] "kallisto_results/v109/Control_siRNA_1/gene_expr.csv"
all_tx_files <-   list.files("kallisto_results", pattern = "abundance.tsv", 
                             recursive = TRUE, full.names = TRUE)
all_sample1_tx_files <- all_tx_files[grep("Control_siRNA_1", all_tx_files)]
all_sample1_tx_files
##  [1] "kallisto_results/v100/Control_siRNA_1/abundance.tsv"
##  [2] "kallisto_results/v101/Control_siRNA_1/abundance.tsv"
##  [3] "kallisto_results/v102/Control_siRNA_1/abundance.tsv"
##  [4] "kallisto_results/v103/Control_siRNA_1/abundance.tsv"
##  [5] "kallisto_results/v104/Control_siRNA_1/abundance.tsv"
##  [6] "kallisto_results/v105/Control_siRNA_1/abundance.tsv"
##  [7] "kallisto_results/v106/Control_siRNA_1/abundance.tsv"
##  [8] "kallisto_results/v107/Control_siRNA_1/abundance.tsv"
##  [9] "kallisto_results/v108/Control_siRNA_1/abundance.tsv"
## [10] "kallisto_results/v109/Control_siRNA_1/abundance.tsv"
## [11] "kallisto_results/v86/Control_siRNA_1/abundance.tsv" 
## [12] "kallisto_results/v87/Control_siRNA_1/abundance.tsv" 
## [13] "kallisto_results/v88/Control_siRNA_1/abundance.tsv" 
## [14] "kallisto_results/v89/Control_siRNA_1/abundance.tsv" 
## [15] "kallisto_results/v90/Control_siRNA_1/abundance.tsv" 
## [16] "kallisto_results/v91/Control_siRNA_1/abundance.tsv" 
## [17] "kallisto_results/v92/Control_siRNA_1/abundance.tsv" 
## [18] "kallisto_results/v93/Control_siRNA_1/abundance.tsv" 
## [19] "kallisto_results/v94/Control_siRNA_1/abundance.tsv" 
## [20] "kallisto_results/v95/Control_siRNA_1/abundance.tsv" 
## [21] "kallisto_results/v96/Control_siRNA_1/abundance.tsv" 
## [22] "kallisto_results/v97/Control_siRNA_1/abundance.tsv" 
## [23] "kallisto_results/v98/Control_siRNA_1/abundance.tsv" 
## [24] "kallisto_results/v99/Control_siRNA_1/abundance.tsv"
all_sample1_tx_files <- all_sample1_tx_files[c(c(11:24), c(1:10))]
all_sample1_tx_files
##  [1] "kallisto_results/v86/Control_siRNA_1/abundance.tsv" 
##  [2] "kallisto_results/v87/Control_siRNA_1/abundance.tsv" 
##  [3] "kallisto_results/v88/Control_siRNA_1/abundance.tsv" 
##  [4] "kallisto_results/v89/Control_siRNA_1/abundance.tsv" 
##  [5] "kallisto_results/v90/Control_siRNA_1/abundance.tsv" 
##  [6] "kallisto_results/v91/Control_siRNA_1/abundance.tsv" 
##  [7] "kallisto_results/v92/Control_siRNA_1/abundance.tsv" 
##  [8] "kallisto_results/v93/Control_siRNA_1/abundance.tsv" 
##  [9] "kallisto_results/v94/Control_siRNA_1/abundance.tsv" 
## [10] "kallisto_results/v95/Control_siRNA_1/abundance.tsv" 
## [11] "kallisto_results/v96/Control_siRNA_1/abundance.tsv" 
## [12] "kallisto_results/v97/Control_siRNA_1/abundance.tsv" 
## [13] "kallisto_results/v98/Control_siRNA_1/abundance.tsv" 
## [14] "kallisto_results/v99/Control_siRNA_1/abundance.tsv" 
## [15] "kallisto_results/v100/Control_siRNA_1/abundance.tsv"
## [16] "kallisto_results/v101/Control_siRNA_1/abundance.tsv"
## [17] "kallisto_results/v102/Control_siRNA_1/abundance.tsv"
## [18] "kallisto_results/v103/Control_siRNA_1/abundance.tsv"
## [19] "kallisto_results/v104/Control_siRNA_1/abundance.tsv"
## [20] "kallisto_results/v105/Control_siRNA_1/abundance.tsv"
## [21] "kallisto_results/v106/Control_siRNA_1/abundance.tsv"
## [22] "kallisto_results/v107/Control_siRNA_1/abundance.tsv"
## [23] "kallisto_results/v108/Control_siRNA_1/abundance.tsv"
## [24] "kallisto_results/v109/Control_siRNA_1/abundance.tsv"

1.1 Some quantifications of things over time

genes_over_time <- lapply(1:24, function(arg) {
  
  gene_file <- read.csv(all_sample1_gene_files[arg], sep = ";")
  n_genes <- nrow(gene_file)
  
  tx_file <- read.csv(all_sample1_tx_files[arg], sep = "\t")
  n_transcripts <- nrow(tx_file)
  
  gene_ids <- gene_file$gene_id
  gene_names <- gene_file$gene_symbol
  
  tx_ids <- tx_file$target_id
  
  out <- list(
    n_genes = n_genes,
    n_transcripts = n_transcripts,
    gene_ids = gene_ids,
    gene_names = gene_names,
    tx_ids = tx_ids
  )
})

names(genes_over_time) <- (list.files("kallisto_results", recursive = FALSE))[c(c(11:24), c(1:10))]

anns_df <- data.frame(
  version = factor(names(genes_over_time), levels = names(genes_over_time)),
  n_genes = sapply(genes_over_time, function(arg) arg$n_genes),
  n_transcripts = sapply(genes_over_time, function(arg) arg$n_transcripts)
)

anns_df
##      version n_genes n_transcripts
## v86      v86   34983        178136
## v87      v87   34983        178136
## v88      v88   34835        179973
## v89      v89   34835        179973
## v90      v90   34912        180869
## v91      v91   34912        180869
## v92      v92   35004        185299
## v93      v93   35004        185299
## v94      v94   35548        187626
## v95      v95   35548        187626
## v96      v96   35571        188753
## v97      v97   35593        189154
## v98      v98   35593        190069
## v99      v99   35604        190432
## v100    v100   35602        190522
## v101    v101   35598        191887
## v102    v102   35601        194360
## v103    v103   35602        196722
## v104    v104   35606        199240
## v105    v105   35623        202897
## v106    v106   35640        204563
## v107    v107   35632        207877
## v108    v108   35646        205131
## v109    v109   35658        205541

Plotting these things a bit

library("ggplot2")

ggplot(anns_df,
       aes(x = version, y = n_genes)) + 
  coord_cartesian(ylim = c(0, NA)) + 
  geom_point() +
  theme_bw() + 
  ggtitle("Number of genes over time", subtitle = "v86 to v109")

ggplot(anns_df,
       aes(x = version, y = n_genes)) + 
  geom_point() +
  theme_bw() + 
  ggtitle("Number of genes over time", subtitle = "v86 to v109")

ggplot(anns_df,
       aes(x = version, y = n_transcripts)) + 
  coord_cartesian(ylim = c(0, NA)) + 
  geom_point() +
  theme_bw() + 
  ggtitle("Number of transcripts over time", subtitle = "v86 to v109")

ggplot(anns_df,
       aes(x = version, y = n_transcripts)) + 
  geom_point() +
  theme_bw() + 
  ggtitle("Number of transcripts over time", subtitle = "v86 to v109")

Working a bit more on the “real content” of the genes included in the annotations

  • sampling over the whole period
gplots::venn(
  list(
    v86 = genes_over_time$v86$gene_ids,
    v94 = genes_over_time$v94$gene_ids,
    v102 = genes_over_time$v102$gene_ids,
    v109 = genes_over_time$v109$gene_ids
  )
)

gplots::venn(
  list(
    v86 = genes_over_time$v86$gene_names,
    v94 = genes_over_time$v94$gene_names,
    v102 = genes_over_time$v102$gene_names,
    v109 = genes_over_time$v109$gene_names
  )
)

  • sampling a little bit more close over time
gplots::venn(
  list(
    v86 = genes_over_time$v86$gene_ids,
    v87 = genes_over_time$v87$gene_ids,
    v88 = genes_over_time$v88$gene_ids,
    v89 = genes_over_time$v89$gene_ids
  )
)

Let’s take this a little bit next level, with an Upset plot

UpSetR::upset(
  UpSetR::fromList(
    list(
      v86 = genes_over_time$v86$gene_ids,
      v94 = genes_over_time$v94$gene_ids,
      v102 = genes_over_time$v102$gene_ids,
      v109 = genes_over_time$v109$gene_ids
    )
  )
)

UpSetR::upset(
  UpSetR::fromList(
    list(
      v86 = genes_over_time$v86$gene_ids,
      v88 = genes_over_time$v88$gene_ids,
      v90 = genes_over_time$v90$gene_ids,
      v92 = genes_over_time$v92$gene_ids,
      v94 = genes_over_time$v94$gene_ids,
      v96 = genes_over_time$v96$gene_ids,
      v98 = genes_over_time$v98$gene_ids,
      v100 = genes_over_time$v100$gene_ids,
      v102 = genes_over_time$v102$gene_ids,
      v104 = genes_over_time$v104$gene_ids,
      v106 = genes_over_time$v106$gene_ids,
      v109 = genes_over_time$v109$gene_ids
    )
  ), nintersects = NA, nsets = 12
)

2 DE over time

DE with oldest version

all_samples_v86 <- file.path(
  list.files(path = "kallisto_results/v86", 
             full.names = TRUE), 
  "gene_expr.csv")

file.exists(all_samples_v86)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
cm_v86 <- data.frame(
  ctrl_r1 =   read.csv(all_samples_v86[1], sep = ";")$count,
  ctrl_r2 =   read.csv(all_samples_v86[2], sep = ";")$count,
  ctrl_r3 =   read.csv(all_samples_v86[3], sep = ";")$count,
  stat5a_r1 = read.csv(all_samples_v86[4], sep = ";")$count,
  stat5a_r2 = read.csv(all_samples_v86[5], sep = ";")$count,
  stat5a_r3 = read.csv(all_samples_v86[6], sep = ";")$count,
  stat5b_r1 = read.csv(all_samples_v86[7], sep = ";")$count,
  stat5b_r2 = read.csv(all_samples_v86[8], sep = ";")$count,
  stat5b_r3 = read.csv(all_samples_v86[9], sep = ";")$count,
  row.names = read.csv(all_samples_v86[1], sep = ";")$gene_id
)

anno_v86 <- data.frame(
  gene_id = read.csv(all_samples_v86[1], sep = ";")$gene_id,
  gene_name = read.csv(all_samples_v86[1], sep = ";")$gene_symbol,
  row.names = read.csv(all_samples_v86[1], sep = ";")$gene_id
)

head(cm_v86)
##                 ctrl_r1 ctrl_r2 ctrl_r3 stat5a_r1 stat5a_r2 stat5a_r3 stat5b_r1
## ENSG00000000003       4       3       0         5         2         7         1
## ENSG00000000005       0       0       0         0         0         0         0
## ENSG00000000419       0       1       0         2         1         0         0
## ENSG00000000457     295     216     250       282       390       333       246
## ENSG00000000460     140     116     125        99       107        83       137
## ENSG00000000938   10111    7849    9162     13445     12557     10639     10144
##                 stat5b_r2 stat5b_r3
## ENSG00000000003         2         2
## ENSG00000000005         0         0
## ENSG00000000419         0         0
## ENSG00000000457       246       309
## ENSG00000000460       157       106
## ENSG00000000938     10895     10169
dim(cm_v86)
## [1] 34983     9
head(anno_v86)
##                         gene_id gene_name
## ENSG00000000003 ENSG00000000003    TSPAN6
## ENSG00000000005 ENSG00000000005      TNMD
## ENSG00000000419 ENSG00000000419      DPM1
## ENSG00000000457 ENSG00000000457     SCYL3
## ENSG00000000460 ENSG00000000460  C1orf112
## ENSG00000000938 ENSG00000000938       FGR
samples_metadata <- data.frame(
  group = c("ctrl", "ctrl", "ctrl", 
            "stat5a", "stat5a", "stat5a", 
            "stat5b", "stat5b", "stat5b"),
  id = list.files(path = "kallisto_results/v86")
)

library("DESeq2")
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, table,
##     tapply, union, unique, unsplit, which.max, which.min
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
dds_v86 <- DESeq2::DESeqDataSetFromMatrix(
  countData = as.matrix(cm_v86),
  colData = samples_metadata,
  design = ~group
)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
dds_v86
## class: DESeqDataSet 
## dim: 34983 9 
## metadata(1): version
## assays(1): counts
## rownames(34983): ENSG00000000003 ENSG00000000005 ... ENSG00000283697
##   ENSG00000283698
## rowData names(0):
## colnames(9): ctrl_r1 ctrl_r2 ... stat5b_r2 stat5b_r3
## colData names(2): group id
rowData(dds_v86) <- anno_v86

pcaExplorer::pcaplot(vst(dds_v86), intgroup = "group", ellipse = FALSE)
## 
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

dds_v86 <- DESeq(dds_v86)
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## estimating size factors
## estimating dispersions
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## gene-wise dispersion estimates
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## mean-dispersion relationship
## final dispersion estimates
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## fitting model and testing
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
resultsNames(dds_v86)
## [1] "Intercept"            "group_stat5a_vs_ctrl" "group_stat5b_vs_ctrl"
summary(results(dds_v86, name = "group_stat5a_vs_ctrl"), alpha = 0.05)
## 
## out of 14103 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 380, 2.7%
## LFC < 0 (down)     : 285, 2%
## outliers [1]       : 1, 0.0071%
## low counts [2]     : 10589, 75%
## (mean count < 24)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

DE with latest version

all_samples_v109 <- file.path(
  list.files(path = "kallisto_results/v109", 
             full.names = TRUE), 
  "gene_expr.csv")

file.exists(all_samples_v109)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
cm_v109 <- data.frame(
  ctrl_r1 =   read.csv(all_samples_v109[1], sep = ";")$count,
  ctrl_r2 =   read.csv(all_samples_v109[2], sep = ";")$count,
  ctrl_r3 =   read.csv(all_samples_v109[3], sep = ";")$count,
  stat5a_r1 = read.csv(all_samples_v109[4], sep = ";")$count,
  stat5a_r2 = read.csv(all_samples_v109[5], sep = ";")$count,
  stat5a_r3 = read.csv(all_samples_v109[6], sep = ";")$count,
  stat5b_r1 = read.csv(all_samples_v109[7], sep = ";")$count,
  stat5b_r2 = read.csv(all_samples_v109[8], sep = ";")$count,
  stat5b_r3 = read.csv(all_samples_v109[9], sep = ";")$count,
  row.names = read.csv(all_samples_v109[1], sep = ";")$gene_id
)

anno_v109 <- data.frame(
  gene_id = read.csv(all_samples_v109[1], sep = ";")$gene_id,
  gene_name = read.csv(all_samples_v109[1], sep = ";")$gene_symbol,
  row.names = read.csv(all_samples_v109[1], sep = ";")$gene_id
)

dim(cm_v109)
## [1] 35658     9
head(cm_v109)
##                 ctrl_r1 ctrl_r2 ctrl_r3 stat5a_r1 stat5a_r2 stat5a_r3 stat5b_r1
## ENSG00000000003       6       3       0         4         2         3         1
## ENSG00000000005       0       0       0         0         0         0         0
## ENSG00000000419      21      13      18        12        25        27        13
## ENSG00000000457     294     214     249       275       390       331       244
## ENSG00000000460     137     110     117        92       102        80       121
## ENSG00000000938   10109    7849    9157     13440     12554     10639     10137
##                 stat5b_r2 stat5b_r3
## ENSG00000000003         1         2
## ENSG00000000005         0         0
## ENSG00000000419        15        13
## ENSG00000000457       242       305
## ENSG00000000460       144       104
## ENSG00000000938     10890     10165
head(anno_v109)
##                         gene_id gene_name
## ENSG00000000003 ENSG00000000003    TSPAN6
## ENSG00000000005 ENSG00000000005      TNMD
## ENSG00000000419 ENSG00000000419      DPM1
## ENSG00000000457 ENSG00000000457     SCYL3
## ENSG00000000460 ENSG00000000460  C1orf112
## ENSG00000000938 ENSG00000000938       FGR
dds_v109 <- DESeq2::DESeqDataSetFromMatrix(
  countData = as.matrix(cm_v109),
  colData = samples_metadata,
  design = ~group
)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
dds_v109
## class: DESeqDataSet 
## dim: 35658 9 
## metadata(1): version
## assays(1): counts
## rownames(35658): ENSG00000000003 ENSG00000000005 ... ENSG00000291316
##   ENSG00000291317
## rowData names(0):
## colnames(9): ctrl_r1 ctrl_r2 ... stat5b_r2 stat5b_r3
## colData names(2): group id
rowData(dds_v109) <- anno_v109

pcaExplorer::pcaplot(vst(dds_v109), intgroup = "group", ellipse = FALSE)
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

dds_v109 <- DESeq(dds_v109)
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## estimating size factors
## estimating dispersions
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## gene-wise dispersion estimates
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## mean-dispersion relationship
## final dispersion estimates
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## fitting model and testing
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
resultsNames(dds_v109)
## [1] "Intercept"            "group_stat5a_vs_ctrl" "group_stat5b_vs_ctrl"
summary(results(dds_v109, name = "group_stat5a_vs_ctrl"), alpha = 0.05)
## 
## out of 14490 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 367, 2.5%
## LFC < 0 (down)     : 270, 1.9%
## outliers [1]       : 2, 0.014%
## low counts [2]     : 9595, 66%
## (mean count < 11)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
library("patchwork")

pcaExplorer::pcaplot(vst(dds_v86), intgroup = "group", ellipse = FALSE, ntop = 500) |
  pcaExplorer::pcaplot(vst(dds_v109), intgroup = "group", ellipse = FALSE, ntop = 500)
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

pcaExplorer::pcaplot(vst(dds_v86), intgroup = "group", ellipse = FALSE, ntop = 2000) |
  pcaExplorer::pcaplot(vst(dds_v109), intgroup = "group", ellipse = FALSE, ntop = 2000)
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

pcaExplorer::pcaplot(vst(dds_v86), intgroup = "group", ellipse = FALSE, ntop = 20000) |
  pcaExplorer::pcaplot(vst(dds_v109), intgroup = "group", ellipse = FALSE, ntop = 20000)
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

res_v86 <- results(dds_v86, name = "group_stat5a_vs_ctrl")
res_v109 <- results(dds_v109, name = "group_stat5a_vs_ctrl")

3 What about the functional level?

We will do some enrichment analyses on the sets above

library("mosdef")
library("org.Hs.eg.db")
## Loading required package: AnnotationDbi
## 
res_enrich_v86 <- mosdef::run_cluPro(
  dds = dds_v86,
  res_de = res_v86,
  mapping = "org.Hs.eg.db", 
  ont = "BP"
)
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## 'select()' returned 1:many mapping between keys and columns
## Your dataset has 665 DE genes. You selected 665 (100.00%) genes. You analysed all up_and_down-regulated genes
DT::datatable(as.data.frame(res_enrich_v86))
res_enrich_v109 <- mosdef::run_cluPro(
  dds = dds_v109,
  res_de = res_v109,
  mapping = "org.Hs.eg.db",
  ont = "BP"
)
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## 'select()' returned 1:many mapping between keys and columns
## Your dataset has 637 DE genes. You selected 637 (100.00%) genes. You analysed all up_and_down-regulated genes
DT::datatable(as.data.frame(res_enrich_v109))

Using topGO to try and obtain more targeted and focused categories

library("topGO")
## Loading required package: graph
## Loading required package: GO.db
## Loading required package: SparseM
## 
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
## 
##     backsolve
## 
## groupGOTerms:    GOBPTerm, GOMFTerm, GOCCTerm environments built.
## 
## Attaching package: 'topGO'
## The following object is masked from 'package:IRanges':
## 
##     members
res_topgo_v86 <- mosdef::run_topGO(
  dds = dds_v86,
  res_de = res_v86,
  mapping = "org.Hs.eg.db", 
  ont = "BP"
)
## 'select()' returned 1:many mapping between keys and columns
## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## 'select()' returned 1:many mapping between keys and columns
## Your dataset has 665 DE genes. You selected 665 (100.00%) genes. You analysed all up_and_down-regulated genes
## 5233 GO terms were analyzed. Not all of them are significantly enriched.
## We suggest further subsetting the output list by for example: 
## using a pvalue cutoff in the column: 
## 'p.value_elim'.
res_topgo_v109 <- mosdef::run_topGO(
  dds = dds_v109,
  res_de = res_v109,
  mapping = "org.Hs.eg.db",
  ont = "BP"
)
## 'select()' returned 1:many mapping between keys and columns
## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## 'select()' returned 1:many mapping between keys and columns
## Your dataset has 637 DE genes. You selected 637 (100.00%) genes. You analysed all up_and_down-regulated genes
## 5421 GO terms were analyzed. Not all of them are significantly enriched.
## We suggest further subsetting the output list by for example: 
## using a pvalue cutoff in the column: 
## 'p.value_elim'.
DT::datatable(as.data.frame(res_topgo_v86))
## Warning in instance$preRenderHook(instance): It seems your data is too big for
## client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
DT::datatable(as.data.frame(res_topgo_v109))
## Warning in instance$preRenderHook(instance): It seems your data is too big for
## client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html

Might be a bit brutal, but let’s try

merged_go_df <- merge(
  as.data.frame(res_topgo_v86),
  as.data.frame(res_topgo_v109), 
  by = "GO.ID"
)

plot(merged_go_df$p.value_elim.x, 
     merged_go_df$p.value_elim.y, 
     log = "xy")
abline(a = 0, b = 1)

Building up some GeneTonicList object for easy exploration

library("GeneTonic")
## 
## Attaching package: 'GeneTonic'
## The following objects are masked from 'package:mosdef':
## 
##     deseqresult2df, gene_plot, geneinfo_2_html, go_2_html, map2color,
##     styleColorBar_divergent
gtl_v86 <- GeneTonicList(
  dds_v86,
  res_v86,
  shake_enrichResult(res_enrich_v86),
  annotation_obj = anno_v86
)
## Found 4036 gene sets in `enrichResult` object, of which 246 are significant.
## Converting for usage in GeneTonic...
## ---------------------------------
## ----- GeneTonicList object ------
## ---------------------------------
## 
## ----- dds object -----
## Providing an expression object (as DESeqDataset) of 34983 features over 9 samples
## 
## ----- res_de object -----
## Providing a DE result object (as DESeqResults), 34983 features tested, 665 found as DE
## Upregulated:     380
## Downregulated:   285
## 
## ----- res_enrich object -----
## Providing an enrichment result object, 4036 reported
## 
## ----- annotation_obj object -----
## Providing an annotation object of 34983 features with information on 2 identifier types
gtl_v109 <- GeneTonicList(
  dds_v109,
  res_v109,
  shake_enrichResult(res_enrich_v109),
  annotation_obj = anno_v109
)
## Found 4096 gene sets in `enrichResult` object, of which 286 are significant.
## Converting for usage in GeneTonic...
## ---------------------------------
## ----- GeneTonicList object ------
## ---------------------------------
## 
## ----- dds object -----
## Providing an expression object (as DESeqDataset) of 35658 features over 9 samples
## 
## ----- res_de object -----
## Providing a DE result object (as DESeqResults), 35658 features tested, 637 found as DE
## Upregulated:     367
## Downregulated:   270
## 
## ----- res_enrich object -----
## Providing an enrichment result object, 4096 reported
## 
## ----- annotation_obj object -----
## Providing an annotation object of 35658 features with information on 2 identifier types

A more quanti-quali tative approach would be with a representative heatmap

vst_v86 <- vst(dds_v86)
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
## Warning in as.data.frame.numeric(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
vst_v109 <- vst(dds_v109)
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead
GeneTonic::gs_heatmap(vst_v86,
                      gtl = gtl_v86,
                      geneset_id = "GO:0031640", 
                      anno_col_info = "group")
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

GeneTonic::gs_heatmap(vst_v109,
                      gtl = gtl_v109,
                      geneset_id = "GO:0031640", 
                      anno_col_info = "group")
## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.factor()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

## Warning in as.data.frame.factor(col, optional = optional): Direct call of
## 'as.data.frame.numeric()' is deprecated.  Use 'as.data.frame.vector()' or
## 'as.data.frame()' instead

Session info

sessionInfo()
## R Under development (unstable) (2024-03-12 r86109)
## Platform: x86_64-apple-darwin20
## Running under: macOS Monterey 12.7.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/Berlin
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] GeneTonic_2.7.2             topGO_2.55.0               
##  [3] SparseM_1.81                GO.db_3.18.0               
##  [5] graph_1.81.0                org.Hs.eg.db_3.18.0        
##  [7] AnnotationDbi_1.65.2        mosdef_0.99.0              
##  [9] patchwork_1.2.0             bigmemory_4.6.4            
## [11] DESeq2_1.43.4               SummarizedExperiment_1.33.3
## [13] Biobase_2.63.0              MatrixGenerics_1.15.0      
## [15] matrixStats_1.2.0           GenomicRanges_1.55.3       
## [17] GenomeInfoDb_1.39.9         IRanges_2.37.1             
## [19] S4Vectors_0.41.4            BiocGenerics_0.49.1        
## [21] ggplot2_3.5.0              
## 
## loaded via a namespace (and not attached):
##   [1] GSEABase_1.65.1          progress_1.2.3           DT_0.32                 
##   [4] Biostrings_2.71.4        vctrs_0.6.5              digest_0.6.35           
##   [7] png_0.1-8                shape_1.4.6.1            shinyBS_0.61.1          
##  [10] registry_0.5-1           ggrepel_0.9.5            magick_2.8.3            
##  [13] MASS_7.3-60.2            reshape2_1.4.4           httpuv_1.6.14           
##  [16] foreach_1.5.2            qvalue_2.35.0            withr_3.0.0             
##  [19] xfun_0.42                ggfun_0.1.4              ellipsis_0.3.2          
##  [22] survival_3.5-8           memoise_2.0.1            clusterProfiler_4.11.0  
##  [25] gson_0.1.0               BiasedUrn_2.0.11         tidytree_0.4.6          
##  [28] GlobalOptions_0.1.2      gtools_3.9.5             prettyunits_1.2.0       
##  [31] KEGGREST_1.43.0          promises_1.2.1           httr_1.4.7              
##  [34] restfulr_0.0.15          rstudioapi_0.15.0        shinyAce_0.4.2          
##  [37] miniUI_0.1.1.1           generics_0.1.3           DOSE_3.29.2             
##  [40] base64enc_0.1-3          curl_5.2.1               zlibbioc_1.49.3         
##  [43] ggraph_2.2.1             polyclip_1.10-6          ca_0.71.1               
##  [46] GenomeInfoDbData_1.2.11  SparseArray_1.3.4        RBGL_1.79.0             
##  [49] threejs_0.3.3            xtable_1.8-4             stringr_1.5.1           
##  [52] doParallel_1.0.17        evaluate_0.23            S4Arrays_1.3.6          
##  [55] BiocFileCache_2.11.1     hms_1.1.3                colorspace_2.1-0        
##  [58] filelock_1.0.3           visNetwork_2.1.2         pcaExplorer_2.29.0      
##  [61] shinyWidgets_0.8.2       magrittr_2.0.3           Rgraphviz_2.47.0        
##  [64] later_1.3.2              viridis_0.6.5            ggtree_3.11.1           
##  [67] lattice_0.22-5           NMF_0.27                 genefilter_1.85.1       
##  [70] XML_3.99-0.16.1          shadowtext_0.1.3         cowplot_1.1.3           
##  [73] pillar_1.9.0             nlme_3.1-164             iterators_1.0.14        
##  [76] gridBase_0.4-7           caTools_1.18.2           compiler_4.4.0          
##  [79] stringi_1.8.3            shinycssloaders_1.0.0    Category_2.69.0         
##  [82] TSP_1.2-4                dendextend_1.17.1        GenomicAlignments_1.39.4
##  [85] plyr_1.8.9               crayon_1.5.2             abind_1.4-5             
##  [88] BiocIO_1.13.0            gridGraphics_0.5-1       locfit_1.5-9.9          
##  [91] graphlayouts_1.1.1       bit_4.0.5                UpSetR_1.4.0            
##  [94] dplyr_1.1.4              fastmatch_1.1-4          codetools_0.2-19        
##  [97] crosstalk_1.2.1          bslib_0.6.1              GetoptLong_1.0.5        
## [100] plotly_4.10.4            mime_0.12                splines_4.4.0           
## [103] circlize_0.4.16          Rcpp_1.0.12              dbplyr_2.4.0            
## [106] tippy_0.1.0              HDO.db_0.99.1            knitr_1.45              
## [109] blob_1.2.4               utf8_1.2.4               clue_0.3-65             
## [112] BiocVersion_3.19.1       fs_1.6.3                 backbone_2.1.3          
## [115] expm_0.999-9             ggplotify_0.1.2          tibble_3.2.1            
## [118] Matrix_1.6-5             statmod_1.5.0            tweenr_2.0.3            
## [121] pkgconfig_2.0.3          pheatmap_1.0.12          tools_4.4.0             
## [124] cachem_1.0.8             RSQLite_2.3.5            viridisLite_0.4.2       
## [127] DBI_1.2.2                fastmap_1.1.1            rmarkdown_2.26          
## [130] scales_1.3.0             grid_4.4.0               shinydashboard_0.7.2    
## [133] Rsamtools_2.19.3         AnnotationHub_3.11.1     sass_0.4.8              
## [136] BiocManager_1.30.22      farver_2.1.1             tidygraph_1.3.1         
## [139] scatterpie_0.2.1         mgcv_1.9-1               yaml_2.3.8              
## [142] AnnotationForge_1.45.0   rtracklayer_1.63.1       cli_3.6.2               
## [145] purrr_1.0.2              webshot_0.5.5            lifecycle_1.0.4         
## [148] rintrojs_0.3.4           BiocParallel_1.37.1      annotate_1.81.2         
## [151] gtable_0.3.4             rjson_0.2.21             ggridges_0.5.6          
## [154] parallel_4.4.0           ape_5.7-1                limma_3.59.5            
## [157] jsonlite_1.8.8           colourpicker_1.3.0       seriation_1.5.4         
## [160] bitops_1.0-7             bigmemory.sri_0.1.8      bit64_4.0.5             
## [163] assertthat_0.2.1         yulab.utils_0.1.4        heatmaply_1.5.0         
## [166] geneLenDataBase_1.39.0   bs4Dash_2.3.3            jquerylib_0.1.4         
## [169] highr_0.10               GOSemSim_2.29.1          lazyeval_0.2.2          
## [172] shiny_1.8.0              dynamicTreeCut_1.63-1    htmltools_0.5.7         
## [175] enrichplot_1.23.1        rappdirs_0.3.3           glue_1.7.0              
## [178] httr2_1.0.0              XVector_0.43.1           RCurl_1.98-1.14         
## [181] treeio_1.27.0            ComplexUpset_1.3.3       gridExtra_2.3           
## [184] igraph_2.0.3             R6_2.5.1                 tidyr_1.3.1             
## [187] gplots_3.1.3.1           labeling_0.4.3           GenomicFeatures_1.55.4  
## [190] cluster_2.1.6            rngtools_1.5.2           aplot_0.2.2             
## [193] DelayedArray_0.29.9      tidyselect_1.2.1         GOstats_2.69.0          
## [196] ggforce_0.4.2            xml2_1.3.6               munsell_0.5.0           
## [199] KernSmooth_2.23-22       goseq_1.55.0             data.table_1.15.2       
## [202] htmlwidgets_1.6.4        fgsea_1.29.0             ComplexHeatmap_2.19.0   
## [205] RColorBrewer_1.1-3       biomaRt_2.59.1           rlang_1.1.3             
## [208] uuid_1.2-0               fansi_1.0.6              Cairo_1.6-2